{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import cv2\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "outputs": [],
   "source": [
    "def look_img(img):\n",
    "    img_RGB = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)\n",
    "    plt.imshow(img_RGB)\n",
    "    plt.show()\n",
    "\n",
    "weights = \"yolov3.weights\"\n",
    "config_file = \"yolov3.cfg\"\n",
    "net = cv2.dnn.readNet(weights, config_file)\n",
    "layersNames = net.getLayerNames()\n",
    "output_layers_names = [layersNames[i[0]-1] for i in net.getUnconnectedOutLayers()]\n",
    "\n",
    "with open('coco.names','r')as f:\n",
    "    classes = f.read().splitlines()"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "outputs": [],
   "source": [
    "CONF_THRES = 0.2\n",
    "NMS_THRES = 0.4"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "outputs": [],
   "source": [
    "def process_frame(img):\n",
    "    height,width,_ = img.shape\n",
    "    blob = cv2.dnn.blobFromImage(img, 1/255, (416,416),(0,0,0),swapRB=True,crop=False)\n",
    "    net.setInput(blob)\n",
    "    # 前向推断\n",
    "    prediction = net.forward(output_layers_names)\n",
    "\n",
    "    # 存放预测框的坐标\n",
    "    boxes = []\n",
    "    # 存放(有无物体)置信度\n",
    "    objectess = []\n",
    "    # 存放类别概率\n",
    "    class_probs = []\n",
    "    # 存放预测框类别的索引号\n",
    "    class_ids = []\n",
    "    # 存放预测框类别名称\n",
    "    class_names = []\n",
    "\n",
    "    # 遍历三种尺度\n",
    "    for scale in prediction:\n",
    "        for bbox in scale:\n",
    "            obj = bbox[4]\n",
    "            class_scores = bbox[5:]\n",
    "\n",
    "            # 以下代码为改进代码，加入后运行速度能有一定提升，具体分析看下面的文章\n",
    "            # temp = np.max(class_scores)\n",
    "            # temp1 = obj * temp\n",
    "            # if(temp1<CONF_THRES):\n",
    "            #     continue\n",
    "\n",
    "            class_id = np.argmax(class_scores)\n",
    "            class_name = classes[class_id]\n",
    "            class_prob = class_scores[class_id]\n",
    "\n",
    "            # 获取预测框中心点坐标、预测框宽高\n",
    "            center_x = int(bbox[0]*width)\n",
    "            center_y = int(bbox[1]*height)\n",
    "            w = int(bbox[2]*width)\n",
    "            h = int(bbox[3]*height)\n",
    "            # 预测框左上角坐标\n",
    "            x = int(center_x - w/2)\n",
    "            y = int(center_y - h/2)\n",
    "\n",
    "            boxes.append([x,y,w,h])\n",
    "            objectess.append(float(obj))\n",
    "            class_ids.append(class_id)\n",
    "            class_names.append(class_name)\n",
    "            class_probs.append(class_prob)\n",
    "\n",
    "    confidences = np.array(class_probs)*np.array(objectess) # obj*class_scoreen\n",
    "    indexes = cv2.dnn.NMSBoxes(boxes,confidences,CONF_THRES,NMS_THRES) #阈值和极大值抑制\n",
    "    indexes.flatten()\n",
    "\n",
    "    colors = np.random.uniform(0,255,size=(len(boxes),3)) # 随机给每一个预测框生成一种颜色\n",
    "    # 遍历留下的每一个预测框，可视化\n",
    "    for i in indexes.flatten():\n",
    "        # 获取坐标与置信度\n",
    "        x, y, w, h = boxes[i]\n",
    "        confidence = str(round(confidences[i],2))\n",
    "        # 获取颜色，画框\n",
    "        color = colors[i % len(colors)]\n",
    "        # color = [255, 0, 255]\n",
    "        cv2.rectangle(img,(x,y),(w+x,h+y),color,4)\n",
    "        # 写上类别名称与置信度\n",
    "        string = '{} {}'.format(class_names[i],confidence)\n",
    "        # 图片，文字，左上坐标，字体，字体大小，颜色，字体粗细\n",
    "        cv2.putText(img,string,(x,y+20),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255),1)\n",
    "    # look_img(img)\n",
    "    return img"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "outputs": [],
   "source": [
    "import cv2\n",
    "\n",
    "def video_demo():\n",
    "    capture = cv2.VideoCapture(0)#0为电脑内置摄像头\n",
    "    while(True):\n",
    "        success, frame = capture.read()#摄像头读取,ret为是否成功打开摄像头,true,false。 frame为视频的每一帧图像\n",
    "        if not success:\n",
    "            print('ERROR')\n",
    "            break\n",
    "        frame = cv2.flip(frame, 1)#摄像头是和人对立的，将图像左右调换回来正常显示。\n",
    "        frame = process_frame(frame)\n",
    "        cv2.imshow(\"video\", frame)\n",
    "        c = cv2.waitKey(50)\n",
    "        if c == 27: # 当按下esc\n",
    "            capture.release()\n",
    "            break\n",
    "\n",
    "video_demo()\n",
    "cv2.destroyAllWindows()"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 发现帧数很低，思考问题所在：系统自带摄像头不行/处理代码速度较慢"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%% md\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "outputs": [],
   "source": [
    "# 运行系统摄像头，检验好坏\n",
    "import cv2\n",
    "\n",
    "def video_demo():\n",
    "    capture = cv2.VideoCapture(0)#0为电脑内置摄像头\n",
    "    while(True):\n",
    "        success, frame = capture.read()#摄像头读取,ret为是否成功打开摄像头,true,false。 frame为视频的每一帧图像\n",
    "        if not success:\n",
    "            print('ERROR')\n",
    "            break\n",
    "        frame = cv2.flip(frame, 1)#摄像头是和人对立的，将图像左右调换回来正常显示。\n",
    "        cv2.imshow(\"video\", frame)\n",
    "        c = cv2.waitKey(50)\n",
    "        if c == 27: # 当按下esc\n",
    "            capture.release()\n",
    "            break\n",
    "\n",
    "video_demo()\n",
    "cv2.destroyAllWindows()"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "outputs": [],
   "source": [
    "# 测试代码各模块耗时\n",
    "import time\n",
    "def process_frame1(img):\n",
    "    start = time.time()\n",
    "    height,width,_ = img.shape\n",
    "    blob = cv2.dnn.blobFromImage(img, 1/255, (416,416),(0,0,0),swapRB=True,crop=False)\n",
    "    end0 = time.time()\n",
    "\n",
    "    net.setInput(blob)\n",
    "    # 前向推断\n",
    "    prediction = net.forward(output_layers_names)\n",
    "    end1 = time.time()\n",
    "\n",
    "    # 存放预测框的坐标\n",
    "    boxes = []\n",
    "    # 存放(有无物体)置信度\n",
    "    objectess = []\n",
    "    # 存放类别概率\n",
    "    class_probs = []\n",
    "    # 存放预测框类别的索引号\n",
    "    class_ids = []\n",
    "    # 存放预测框类别名称\n",
    "    class_names = []\n",
    "    # 遍历三种尺度\n",
    "    for scale in prediction:\n",
    "        for bbox in scale:\n",
    "            obj = bbox[4]\n",
    "            class_scores = bbox[5:]\n",
    "\n",
    "            # temp = np.max(class_scores)\n",
    "            # temp1 = obj * temp\n",
    "            # if(temp1<CONF_THRES):\n",
    "            #     continue\n",
    "\n",
    "            class_id = np.argmax(class_scores)\n",
    "            class_name = classes[class_id]\n",
    "            class_prob = class_scores[class_id]\n",
    "\n",
    "            # 获取预测框中心点坐标、预测框宽高\n",
    "            center_x = int(bbox[0]*width)\n",
    "            center_y = int(bbox[1]*height)\n",
    "            w = int(bbox[2]*width)\n",
    "            h = int(bbox[3]*height)\n",
    "            # 预测框左上角坐标\n",
    "            x = int(center_x - w/2)\n",
    "            y = int(center_y - h/2)\n",
    "\n",
    "            boxes.append([x,y,w,h])\n",
    "            objectess.append(float(obj))\n",
    "            class_ids.append(class_id)\n",
    "            class_names.append(class_name)\n",
    "            class_probs.append(class_prob)\n",
    "    end2 = time.time()\n",
    "\n",
    "    confidences = np.array(class_probs)*np.array(objectess) # obj*class_scoreen\n",
    "    indexes = cv2.dnn.NMSBoxes(boxes,confidences,CONF_THRES,NMS_THRES) #阈值和极大值抑制\n",
    "    indexes.flatten()\n",
    "    end3 = time.time()\n",
    "\n",
    "    colors = np.random.uniform(0,255,size=(len(boxes),3)) # 随机给每一个预测框生成一种颜色\n",
    "    # 遍历留下的每一个预测框，可视化\n",
    "    for i in indexes.flatten():\n",
    "        # 获取坐标与置信度\n",
    "        x, y, w, h = boxes[i]\n",
    "        confidence = str(round(confidences[i],2))\n",
    "        # 获取颜色，画框\n",
    "        color = colors[i % len(colors)]\n",
    "        # color = [255, 0, 255]\n",
    "        cv2.rectangle(img,(x,y),(w+x,h+y),color,4)\n",
    "        # 写上类别名称与置信度\n",
    "        string = '{} {}'.format(class_names[i],confidence)\n",
    "        # 图片，文字，左上坐标，字体，字体大小，颜色，字体粗细\n",
    "        cv2.putText(img,string,(x,y+20),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255),1)\n",
    "    # look_img(img)\n",
    "    end4 = time.time()\n",
    "\n",
    "    print(end0-start) # 图片预处理\n",
    "    print(end1-start) # 前向推断\n",
    "    print(end2-start) # 预测框遍历\n",
    "    print(end3-start) # 阈值和NMS处理\n",
    "    print(end4-start) # 绘制边框"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.0020351409912109375\n",
      "0.432842493057251\n",
      "0.6452746391296387\n",
      "0.6472692489624023\n",
      "0.6482670307159424\n"
     ]
    }
   ],
   "source": [
    "img = cv2.imread('images/test4.jpg')\n",
    "process_frame1(img)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "    # 改进部分\n",
    "    for scale in prediction:\n",
    "        for bbox in scale:\n",
    "            obj = bbox[4]\n",
    "            class_scores = bbox[5:]\n",
    "\n",
    "            temp = np.max(class_scores)\n",
    "            temp1 = obj * temp\n",
    "            if(temp1<CONF_THRES):\n",
    "                continue\n",
    "\n",
    "            class_id = np.argmax(class_scores)\n",
    "            class_name = classes[class_id]"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%% md\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}